NPLANES equ 9

org 100h        ; assume al=0 bx=0 sp=di=-2 si=0100h bp=09??h; last 16 bytes of PSP = 0
   db 0x04,0xC3 ;=mov al,0xC3
RO equ $-4*3    ; dd 0.0, 0.0, -128.0
  jmp START

C16 dw 16
C100 dw 100
C239 dw 239

START:
;Video mode + palette: 4 bits orange * 4 bits blue. Uses default index 0 (black).
  mov al,13h
P:int 10h       ; set video mode | set palette index: bx=i dh=R ch=G cl=B

;; Obligatory orange*blue palette.
;  pusha
;  inc bx
;  mov al,bl
;  aam 16        ; ax = ....rrrr....bbbb
;  imul dx,ax,4
;  mov ax,1010h
;  mov cx,dx
;  add ch,cl
;  shr ch,1      ; ch=G = (R+B)/2
;  jnz P         ; dx=cx=0 bx=100h ax=1010h
;  popa

  dec di        ; di = pixel address = -3

;Each frame:
M:mov dx,0xA000-10-20-20-4;  ; visible pixels are A0000..AF9FF: want X=0 Y=0 in the center of the screen
  mov es,dx     ; dx:bx=YX:XX = es:0    (dx and bx are neighbors after PUSHA)

;Generate N gem normals at [bp+200h,300h]: 0 0 1 d=16, +-1 +-1 +-1 d=32
BIG equ $-1
  pusha
  xor ax,ax
  fld1
  fldz
  fldz
PL:
  add bp,si
  pusha
  mov bl,4
RR:
  call STORE                   ;|y x z         ;|x Y Z
  fild word[di-3] ;di=-3       ;|T
  fidiv word[byte C16-2+bx+si-100h] ;|T/100         ;|T/16
  call ROTATE                  ;|Y,Z=R(y,z)    ;|XX,ZZ=r(x,Z)
  fld dword[bp+si+4]           ;|x Y Z         ;|Y XX ZZ
  dec bx
  dec bx
  jnz RR
  fxch st1
  call NORMALIZE
  call STORE
GG:         ; di=-3
  fld1      ;|a b c -> b c a
  shr ax,1
  jnc GNC
  fchs
GNC:
  inc di
  jnz GG
  popa
  inc ax
  cmp al,NPLANES
  jne PL
  popa

;Each pixel:
X:   ;cx=T di=adr_pixel(init=0) bp=09?? si=0100 ah=0   ; cf=0
  inc dx
X2:
  fninit        ; adr:     -18 -16 -14 -12 -10  -8  -6  -4  -2
  pusha         ; stack:    di  si  bp  sp  bx  dx  cx  ax   0
  xor bx,bx     ; s16:  pixadr 100 9??  -2  ..X..Y  T  result

;Compute ray direction.
  fild word[byte BIG + si-100h] ; Z=27000
  fild word[bx-8]  ; Y
  fild word[bx-9]  ; X   |rD.xyz p.d
  call NORMALIZE ;bx=bp
  call STORE

;Hit the gem.
  mov dx,RO-0x100
  call GEM_OUTER   ;|ro[dx] rd[bp] --> cf=1_if_hit di=address_of_hit_facet   ; clobbers ax,bx,cx
  jnc SKY
  xchg ax,di   ; ax=adr_facet
  xchg ax,bp   ; bx=adr_facet, ax=rd
  call REFLECT  ;REFLECT:  ; i[ax] n[bp] --> { r.x y z }
  xchg ax,bp
  call STORE
  
;  mov [bx],di    ; [bx]=pushed_ax
;  fild word[bx]
;  jc D

;Hit the sky.
SKY:
  fld  dword[bp+si+4]
  fmul st0
;  fmul st0         ;|rd.y^2 and skip gamma correction (so actually ^4)
  db 3Dh  ; cmp ax,NN: 1-byte "jmp D_NOSQRT"

;Do gamma correction and convert to 0..255.
D:fsqrt            ;|0..1
D_NOSQRT:
  fimul word[byte C239 + si-100h]
  fistp word[-4] ; pushed ax
  popa

; 4-bit builtin gray palette with cheapo (6$) dithering. (The multiplier must be 239, not 255.)
  ror al,4
  cmp bl,al
;  push bx    ; better noise
;  sub bx,cx
;  imul bx,dx
;  cmp bh,al
  adc al,0
  and al,0Fh
  add al,16
;  pop bx


;; Doubled pixels: fast version.
;  stosb
;  add bx,0xCCCD
;  adc dx,0

  stosb
  add bx,0xCCCD ;dx:bx = YXX += 0000CCCD
  jnc X2
  jnz X   ;do 65536 iterations

  inc cx  ; T++
  in al,60h
  dec al
  jnz M
 ;ret     ; fallthrough

NORMALIZE:  ; { a.x .y .z } --> { n.x .y .z } a[bp](unnormalized) bx=bp
  call STORE        ;|a*a   ; [bp]=a (unnormalized)
  mov bx,bp
  call DOT          ;|a*a   ; [bp]=a (unnormalized)
  fsqrt
  fld1
  fdivrp st1        ;|rsqrt(a*a)        ...    will be: |rd.x rd.y rd.z
LOAD_SCALE: ; { k } a[bp] --> { k*x k*y k*z }
  fld dword[bp+si+4]
  fmul st1           ;|ky k
LOAD_SCALE_XZ:
  fld dword[bp+si+8]
  fmul st2
  fxch st2           ;|k ky kz
  fmul dword[bp+si]  ;|kx ky kz
  ret

ROTATE:  ; { angle } a[bp] --> { Rx Rz }
  fsincos            ;| c s
ROTATE_CS:  ; { c s } a[bp] --> { Rx Rz }
  call LOAD_SCALE_XZ ;| sx c sz
  call LOAD_SCALE_XZ ;| cx sx cz sz
  fsubp st3,st0      ;| sx cz sz-cx
  faddp              ;| sx+cz sz-cx
  ret

STORE: ; { a.x .y .z } --> a[bp]
  fstp dword[bp+si]
  fstp dword[bp+si+4]
  fstp dword[bp+si+8]
  ret

DOT:  ; a[bp] b[bx] --> { (a dot b) }
  fld dword[bp+si]
  fmul dword[bx+si]
  fld dword[bp+si+4]
  fmul dword[bx+si+4]
  faddp
  fld dword[bp+si+8]
  fmul dword[bx+si+8]
  faddp
  ret

; v3 reflect(v3 i, v3 n) { return i - 2*n*(i|n); }
REFLECT:  ; i[ax] n[bp] --> { r.x y z }
  xchg ax,bx
  call DOT ;|i*n    ;i[bx] n[bp]
  fadd st0
  fchs
  call LOAD_SCALE ;|(n * -2(i*n)).x .y .z
  xchg ax,bx
;  jmp SUM

;LOAD_SCALE_SUM:  
;RAY_PARAM: ; { t } rd[bp] ro[ax] --> { (rd*t + ro).xyz }
;  call LOAD_SCALE ;|rd*t
SUM:  ; {a.x y z} a[ax] --> {(a+b).x .y .z}
  xchg ax,bp
  fadd dword[bp+si]
  fld dword[bp+si+4]
  faddp st2,st0
  fld dword[bp+si+8]
  faddp st3,st0
  xchg ax,bp
  ret

GEM_OUTER: ; ro[dx] rd[bp] --> cf=1_if_hit di=address_of_hit_facet   ; clobbers ax,bx,cx
  fild dword[si]  ;|tfront=0 tback=huge
  fldz
  mov cx,NPLANES
  lea bx,[bp+si]  ; bx = current gem; gem normals are at [bp+200h,300h,...]
G:
  fild word[byte C16 + si-100h] ; plane 0 has distance 16, others have 32
  cmp cl,NPLANES
  je GNZ
  fadd st0        ;|pd tf tb
GNZ:
; generic ray-plane intersection
  call DOT        ;|D=pn*rd pd tf tb
  xchg bp,dx
  call DOT        ;|pn*ro D pd tf tb
  xchg bp,dx
  fsubp st2,st0   ;|D N=pd-pn*ro
  ftst
  fnstsw ax
  sahf       ; cf=1 if we're in front of the plane
  fdivp st1,st0   ;|t=N/D tf tb
  jnc GBACK
GFRONT:
  fcom st0,st1
  fnstsw ax
  sahf
  jbe GNEXT         ;if t>tf { tf=t; di=hit_address = current; }
  fst st1
  mov di,bx
  jmp GNEXT
GBACK:
  fcom st0,st2
  fnstsw ax
  sahf
  jae GNEXT        ;if t<tf { tb=t; }
  fst st2
GNEXT:
  fstp st0
  fcom
  fnstsw ax
  sahf              ;if tf>=fb { no_hit: cf=0; early exit } else { cf=1 }
  jae GEXIT
  lea bx,[bx+si] ; don't set flags
  loop G
GEXIT:              ;[di+si] = facehit
  fcompp
  ret

;int ray_gem_outer(ray const& r, v3* hitpos) {
;  float tfront=0, tback=INFf; int face=0;
;  for (int ip=0; ip<LEN(p); ip++) {
;    hit h = ray_plane(r, p[ip]);
;    if (h.front) { if (h.t>tfront) { tfront=h.t; face=ip; } }
;    else         { if (h.t<tback) { tback=h.t; } }
;  }
;  if (tfront<tback) {  // hit!
;    *hitpos = r.o + r.d*tfront;
;    return face;
;  }
;  else return -1;
;}

;float ray_gem_inner(ray& r, int face_excluded) {
;  if (depth>depth_max) return ray_background(r);
;
;  depth++;
;  float t=INFf; int face=0;
;  for (int ip=0; ip<LEN(p); ip++) if (ip != face_excluded) {
;    hit h = ray_plane(r, p[ip]);
;    if (h.t>0 && h.t<t) { t=h.t; face=ip; }
;  }
;
;  v3 hitpos = r.o + r.d*t;
;  float R = fresnel(1.f/ior, r.d|p[face].n);
;  ray rr{hitpos, reflect(r.d, -p[face].n)};
;  float color = R * ray_gem_inner(rr, face);
;  if (R < 1) {
;    ray rt{hitpos, refract(r.d, -p[face].n, ior)};
;    color += (1-R) * ray_background(rt);
;  }
;  depth--;
;  return color * exp(-t * absorptivity_gem);
;}
